We are going to plot the population density distribution of Kenya based on the 2019 census using Python's GeoPandas module and other dependencies. All data (csv and shapefiles) were acquired online from multiple sources (see reference). I will illustrate all the steps from getting the required modules, importing, exploring, cleaning and merging data. Finally, the data will be used to develop a choropleth map.
# Set a folder where data is stored and my script will be based to help in file organisation
import os
os.chdir(r'D:\Programs\Python\Jupiter notebook\Population density of kenya')
os.getcwd()
'D:\\Programs\\Python\\Jupiter notebook\\Population density of kenya'
import os
import geopandas as gpd # Handle geodataframe
import pandas as pd # Handle csv file
import matplotlib.pyplot as plt # Basic plots
import numpy as np # Array manipulations
import contextily as ctx # Adding basemaps
from matplotlib_scalebar.scalebar import ScaleBar # For scalebar
import folium # Enables interactive mapping
# Read kenyan counties shapefile and convert to geodataframe
counties = gpd.read_file(r'D:\Programs\Python\Jupiter notebook\Population density of kenya\County\County.shp')
# Data type
print(type(counties))
# Have a glimpse of the data
print(counties.head())
<class 'geopandas.geodataframe.GeoDataFrame'> OBJECTID AREA PERIMETER COUNTY3_ COUNTY3_ID COUNTY Shape_Leng \ 0 1 5.677 15.047 2.0 1.0 Turkana 15.046838 1 2 6.177 11.974 3.0 2.0 Marsabit 11.974165 2 3 2.117 7.355 4.0 3.0 Mandera 7.355154 3 4 4.610 9.838 5.0 4.0 Wajir 9.838408 4 5 0.740 5.030 6.0 5.0 West Pokot 5.030271 maternal_d geometry 0 3012 POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... 1 1524 POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... 2 2136 POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... 3 581 POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... 4 625 POLYGON ((35.12745 2.62271, 35.12762 2.62302, ...
# Subset data, select only required column
counties = counties[['COUNTY', 'geometry']]
counties.head()
| COUNTY | geometry | |
|---|---|---|
| 0 | Turkana | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... |
| 1 | Marsabit | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... |
| 2 | Mandera | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... |
| 3 | Wajir | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... |
| 4 | West Pokot | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... |
# Read county population data ]
pop_data = pd.read_csv('kenyanpopulation.csv')
pop_data.head()
| County | Population | |
|---|---|---|
| 0 | Mombasa | 1208333 |
| 1 | Kwale | 866820 |
| 2 | Kilifi | 1453787 |
| 3 | Tana River | 315943 |
| 4 | Lamu | 143920 |
# Attributes of data
print(counties.shape, '\n', pop_data.shape)
(47, 2) (47, 2)
# Make column names similar
counties = counties.rename(columns = {'COUNTY':'County'})
counties.head()
| County | geometry | |
|---|---|---|
| 0 | Turkana | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... |
| 1 | Marsabit | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... |
| 2 | Mandera | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... |
| 3 | Wajir | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... |
| 4 | West Pokot | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... |
# Check data similarity
# Iterate over counties's County column (stores name of county) and cross check with popoulation data to determine
# whether they match. Geopandas items() function is used to iterate over series data frame usually contained in columns.
for index, row in counties['County'].items():
# Create a list of county names from population data.
if row in pop_data['County'].tolist(): # Item in list
pass
else: # Item not in list
print(row, ' not in population data.')
# Spatial join - Append the attribute of both tables based on the common field , County.
population = pd.merge(counties, pop_data, how='inner', on='County')
population.head()
| County | geometry | Population | |
|---|---|---|---|
| 0 | Turkana | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... | 926976 |
| 1 | Marsabit | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... | 459785 |
| 2 | Mandera | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... | 867457 |
| 3 | Wajir | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... | 781263 |
| 4 | West Pokot | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... | 621241 |
# Reoder the column names to make geometry be the last item
# First lets create a list of columns
column_list = list(population.columns)
column_list
# Swap two columns: geometry at index 1 with population at index 2
column_list[1], column_list[2] = column_list[2], column_list[1]
# Reassign the new colum order to population
population = population[column_list]
population.head(10)
| County | Population | geometry | |
|---|---|---|---|
| 0 | Turkana | 926976 | POLYGON ((35.79593 5.34449, 35.79659 5.34468, ... |
| 1 | Marsabit | 459785 | POLYGON ((36.05061 4.45622, 36.23184 4.45124, ... |
| 2 | Mandera | 867457 | POLYGON ((41.62133 3.97673, 41.62272 3.97860, ... |
| 3 | Wajir | 781263 | POLYGON ((39.31812 3.47197, 39.31956 3.47168, ... |
| 4 | West Pokot | 621241 | POLYGON ((35.12745 2.62271, 35.12762 2.62302, ... |
| 5 | Samburu | 310327 | POLYGON ((36.73652 2.51379, 36.73706 2.51398, ... |
| 6 | Isiolo | 268002 | POLYGON ((37.94529 1.26288, 38.33966 1.57742, ... |
| 7 | Baringo | 666763 | POLYGON ((35.70707 1.42160, 35.70693 1.42253, ... |
| 8 | Keiyo-Marakwet | 454480 | POLYGON ((35.70280 1.24649, 35.70271 1.24591, ... |
| 9 | Trans Nzoia | 990341 | POLYGON ((34.82016 1.25983, 34.82024 1.26042, ... |
# Plot the counties each with varying colour
population.plot(figsize=(15, 10), column = 'County', edgecolor="black")
plt.title('Kenyan Counties Geographic Coordinate')
plt.show()
# Interactive map
population.explore("County", legend=False)
# View projection of the map
population.crs
<Geographic 2D CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84" ...> Name: WGS 84 Axis Info [ellipsoidal]: - lon[east]: Longitude (Degree) - lat[north]: Latitude (Degree) Area of Use: - undefined Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# Reproject data
# Convert to Web Mercator that does not distort distance along and uses meters as units
population.to_crs(epsg=3857, inplace = True)
# Check
population.crs
<Derived Projected CRS: EPSG:3857> Name: WGS 84 / Pseudo-Mercator Axis Info [cartesian]: - X[east]: Easting (metre) - Y[north]: Northing (metre) Area of Use: - name: World between 85.06°S and 85.06°N. - bounds: (-180.0, -85.06, 180.0, 85.06) Coordinate Operation: - name: Popular Visualisation Pseudo-Mercator - method: Popular Visualisation Pseudo Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# Plot counties with new coordinate system
population.plot(figsize=(15, 10), # Plot size
column = 'County', # Colour by county
edgecolor="black", # County border colour
linewidth=0.3) # Borderline size
plt.title('Kenyan Counties Projected Coordinate System') # Title
plt.show()
# Calculate area in square kilometers of each county and store values a new column called Area
# Round values to 2 decimal places
population['Area'] = round( population.area / 1000000, 2 )
# View data
population.head(10)
| County | Population | geometry | Area | |
|---|---|---|---|---|
| 0 | Turkana | 926976 | POLYGON ((3984784.159 595810.080, 3984858.473 ... | 70486.71 |
| 1 | Marsabit | 459785 | POLYGON ((4013135.502 496564.770, 4033310.206 ... | 76653.17 |
| 2 | Mandera | 867457 | POLYGON ((4633264.866 443043.934, 4633420.288 ... | 26284.41 |
| 3 | Wajir | 781263 | POLYGON ((4376872.567 386734.671, 4377033.085 ... | 57156.36 |
| 4 | West Pokot | 621241 | POLYGON ((3910369.315 292060.939, 3910388.424 ... | 9180.42 |
| 5 | Samburu | 310327 | POLYGON ((4089490.148 279923.099, 4089550.449 ... | 21233.90 |
| 6 | Isiolo | 268002 | POLYGON ((4224050.742 140594.566, 4267951.925 ... | 25530.31 |
| 7 | Baringo | 666763 | POLYGON ((3974892.358 158268.119, 3974877.496 ... | 10871.08 |
| 8 | Keiyo-Marakwet | 454480 | POLYGON ((3974417.175 138769.247, 3974407.408 ... | 3038.97 |
| 9 | Trans Nzoia | 990341 | POLYGON ((3876162.471 140254.976, 3876170.964 ... | 2503.40 |
# Calculate population density and assign a new varaible called Pop Density(People / sq.km)
population['Pop Density(People/Sq.Km)'] = population['Population'] / population['Area']
# View the first 10 and the last 10 observations
print(population.head(10), '\n\n',population.tail(10))
County Population \
0 Turkana 926976
1 Marsabit 459785
2 Mandera 867457
3 Wajir 781263
4 West Pokot 621241
5 Samburu 310327
6 Isiolo 268002
7 Baringo 666763
8 Keiyo-Marakwet 454480
9 Trans Nzoia 990341
geometry Area \
0 POLYGON ((3984784.159 595810.080, 3984858.473 ... 70486.71
1 POLYGON ((4013135.502 496564.770, 4033310.206 ... 76653.17
2 POLYGON ((4633264.866 443043.934, 4633420.288 ... 26284.41
3 POLYGON ((4376872.567 386734.671, 4377033.085 ... 57156.36
4 POLYGON ((3910369.315 292060.939, 3910388.424 ... 9180.42
5 POLYGON ((4089490.148 279923.099, 4089550.449 ... 21233.90
6 POLYGON ((4224050.742 140594.566, 4267951.925 ... 25530.31
7 POLYGON ((3974892.358 158268.119, 3974877.496 ... 10871.08
8 POLYGON ((3974417.175 138769.247, 3974407.408 ... 3038.97
9 POLYGON ((3876162.471 140254.976, 3876170.964 ... 2503.40
Pop Density(People/Sq.Km)
0 13.151075
1 5.998252
2 33.002719
3 13.668873
4 67.670216
5 14.614696
6 10.497405
7 61.333649
8 149.550670
9 395.598386
County Population \
37 Kiambu 2417735
38 Machakos 1421932
39 Kajiado 1117840
40 Nairobi 4397073
41 Makueni 987653
42 Lamu 143920
43 Kilifi 1453787
44 Taita Taveta 340671
45 Kwale 866820
46 Mombasa 1208333
geometry Area \
37 POLYGON ((4069044.517 -102417.883, 4070802.144... 2591.95
38 POLYGON ((4150102.588 -87296.167, 4150172.656 ... 6265.90
39 POLYGON ((4044673.845 -116821.486, 4061966.448... 21999.28
40 POLYGON ((4130256.139 -140465.149, 4130260.385... 709.97
41 POLYGON ((4179969.508 -171202.248, 4180046.370... 8052.21
42 MULTIPOLYGON (((4532158.636 -255558.271, 45322... 6220.58
43 MULTIPOLYGON (((4449273.720 -377928.588, 44492... 12622.20
44 POLYGON ((4290925.498 -331857.644, 4291023.592... 17329.02
45 MULTIPOLYGON (((4386710.862 -524036.154, 43866... 8341.56
46 MULTIPOLYGON (((4406502.956 -458786.282, 44058... 229.12
Pop Density(People/Sq.Km)
37 932.786126
38 226.931805
39 50.812572
40 6193.322253
41 122.656140
42 23.136106
43 115.176990
44 19.658988
45 103.915814
46 5273.799756
# Plot a choropleth map of population density
population.plot(figsize=(15, 10), # Plot size
column = 'Pop Density(People/Sq.Km)', # Colour by population density
edgecolor="black", # County border colour
cmap='Spectral_r', # Colour scheme
legend=True) # Add legend
plt.title('Population Density in Kenya 2019 Census') # Title
plt.show()
# The above map does not show good visualisation as features are not clearly discernible.
# Lets scale the density column using their logarithmic values.
# Add a column of logarithm values of population density
population['Density Scaled'] = np.log10(population['Pop Density(People/Sq.Km)'])
population.head(5)
| County | Population | geometry | Area | Pop Density(People/Sq.Km) | Density Scaled | |
|---|---|---|---|---|---|---|
| 0 | Turkana | 926976 | POLYGON ((3984784.159 595810.080, 3984858.473 ... | 70486.71 | 13.151075 | 1.118961 |
| 1 | Marsabit | 459785 | POLYGON ((4013135.502 496564.770, 4033310.206 ... | 76653.17 | 5.998252 | 0.778025 |
| 2 | Mandera | 867457 | POLYGON ((4633264.866 443043.934, 4633420.288 ... | 26284.41 | 33.002719 | 1.518550 |
| 3 | Wajir | 781263 | POLYGON ((4376872.567 386734.671, 4377033.085 ... | 57156.36 | 13.668873 | 1.135733 |
| 4 | West Pokot | 621241 | POLYGON ((3910369.315 292060.939, 3910388.424 ... | 9180.42 | 67.670216 | 1.830398 |
# Plot map with reclassified density
fig, ax = plt.subplots(figsize=(15, 10),) # Plot size
population.plot(ax=ax, # Axis
column = 'Density Scaled', # Colour by population density
edgecolor="black", # County border colour
cmap='Spectral_r', # Colour scheme reversed
legend=True) # Add legend
# Title
plt.title('KENYAN POPULATION DENSITY 2019 CENSUS')
# Add base map
ctx.add_basemap(ax=ax,
source=ctx.providers.Esri.WorldShadedRelief)
# Annotations
ax.annotate('@kech_cole, 2023',xy=(0.36, .078),
xycoords='figure fraction', horizontalalignment='left',
verticalalignment='top', fontsize=12, color='Black')
# North arrow
x, y, arrow_length = 0.95, 1.0, 0.1
ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
arrowprops=dict(facecolor='black', width=5, headwidth=15),
ha='center', va='center', fontsize=20,
xycoords=ax.transAxes)
# Scale Bar
ax.add_artist(ScaleBar(1, location='lower left', length_fraction=0.3))
plt.show()